'''
Author: zhanwei xu
Date: 2023-05-27 17:22:45
LastEditors: zhanwei xu
LastEditTime: 2023-05-28 21:50:30
Description: 

Copyright (c) 2023 by zhanwei xu, Tsinghua University, All Rights Reserved. 
'''

import requests
import functools
import asyncio
from concurrent.futures import ThreadPoolExecutor
import ssl
from bs4 import BeautifulSoup
import urllib.parse
ssl._create_default_https_context = ssl._create_unverified_context


class BingSearch:
    def __init__(self):
        # 初始化请求头和Cookie信息
        self.headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.35"
        }
        self.proxies = {
            'http': 'http://162.212.157.238:8080',
            'https': 'http://162.212.157.238:8080'
        }
        self.cookies = ""
        self.keys = self.get_bing_keys('bing_keys.txt')
        self.api_key_index = 0
    def get_bing_keys(self, filename):
        with open(filename, 'r') as f:
            keys = f.readlines()
            keys = [key.strip() for key in keys]
        return keys
    def get_key(self):
        key = self.keys[self.api_key_index]
        self.api_key_index += 1
        if self.api_key_index == len(self.keys):
            self.api_key_index = 0
        return key
    def get_cookies(self):
        # 从Bing搜索页面获取Cookie信息
        url = 'https://cn.bing.com/'
        url = 'https://www.baidu.com/'
        response = requests.get(url, headers=self.headers)
        return response.cookies.get_dict()
    async def search(self, keyword):

        # 对搜索关键词进行编码
        # encoded_query = urllib.parse.quote(keyword)
        # # 发起搜索请求
        # url = f"https://cn.bing.com/search?q={encoded_query}&qs=n"
        # url = f"https://www.baidu.com/s?wd={encoded_query}"
        # loop = asyncio.get_event_loop()
        # request = functools.partial(requests.get, url, headers=self.headers)
        # with ThreadPoolExecutor() as executor:
        #     response = await loop.run_in_executor(executor, request)
        # self.cookies = response.cookies.get_dict()
        # request = functools.partial(requests.get, url, headers=self.headers, cookies=self.cookies)
        # with ThreadPoolExecutor() as executor:
        #     response = await loop.run_in_executor(executor, request)
        # print(response.text)
        # # 保存response.text到本地
        # # with open('bing.html', 'w', encoding='utf-8') as f:
        # #     f.write(response.text)
        
        # soup = BeautifulSoup(response.text, 'html.parser')
        # links = soup.find_all("div", {"class": "c-container"})[:20]
        results = []
        # count = 0
        # for link in links:
        #     try:
        #         href = link.find("a").get('href')
        #         desc = link.find("div", class_="c-abstract")
        #         if not desc:
        #             desc = link.find("div", class_="c-row")
        #         if desc:
        #             desc = desc.text
        #             desc = BeautifulSoup(desc, "html.parser").text.strip()
        #             desc = desc[:-3] if desc.endswith("...") else desc
        #             results.append({'href': href, 'desc': desc})
        #             count += 1
        #             if count == 4:
        #                 break
        #     except:
        #         pass
        # # 解析HTML内容
        # soup = BeautifulSoup(response.text, 'html.parser')
        # links = soup.find_all("li", {"class": "b_algo"})[:20]
        # # 提取搜索结果
        # results = []
        # count = 0
        # for link in links:
        #     if link.find("a").get('href') and 'https' in link.find("a").get('href'):
        #         href = link.find("a").get('href')
        #         # get the description
        #         try:
        #             desc = link.find("p").text
        #         except:
        #             continue
        #         # 转为小写并去除左右两边空格
        #         desc = desc.lower().strip()
        #         desc = desc[:-3] if desc.endswith("...") else desc
        #         results.append({'href': href, 'desc': desc})
        #         count += 1
        #         if count == 6:
        #             break

        print(len(results))
        
        if len(results) == 0:
            params = { 'q': keyword, 'mkt': 'zh-CN', 'responseFilter': 'Webpages','setLang': 'zh-hans','count':3 }
            headers = { 'Ocp-Apim-Subscription-Key': self.get_key() }
            endpoint = "https://api.bing.microsoft.com/v7.0/search"
        
            # Call the API
            try:
                loop = asyncio.get_event_loop()
                request = functools.partial(requests.get, endpoint, headers=headers, params=params)
                with ThreadPoolExecutor() as executor:
                    response = await loop.run_in_executor(executor, request)
                # response = requests.get(endpoint, headers=headers, params=params)
                response.raise_for_status()
                data = response.json()
                (data)
                for i in range(3):
                    url = data['webPages']['value'][i]['url']
                    desc = data['webPages']['value'][i]['snippet']
                    results.append({'href': url, 'desc': desc})
            except Exception as ex:
                pass
        # print(results)
        return results

if __name__ == '__main__':
    bing = BingSearch()
    results = asyncio.run(bing.search('中国'))
    print(results)
    print(len(results))
    # pr
